******************************************************************************************************************************************************
*This do file extracts, cleans and appends the 2007-2011 PCI datasets.	
*	a.	Input files: 
*		i.	PCI2007_Web.dta
*		ii.	PCI2008_Web.dta
*		iii.PCI2009_Web.dta
*		iv.	PCI2010_Web.dta
*		v.	PCI2011_Web.dta
*		vi.	ISIC_2007.dta-ISIC_2011.dta
*	b.	Output files:
*		i.	PCI_repeated_cross_CLEAN.dta													   																	    *
******************************************************************************************************************************************************

clear all 
set mem 600m
set more off

*SET DIRECTORY HERE*
global dir_data_original /Users/Jie/Dropbox (Personal)/CorruptionIncome/EJFinalSubmission/data&program/original raw data/
global dir_data_coded /Users/Jie/Dropbox (Personal)/CorruptionIncome/EJFinalSubmission/data&program/intermediary data/


/*****************************************PCI 2007**********************************************************************************************/

use "${dir_data_original}/PCI/PCI2007_Web.dta", clear

*DROP UNNECESSARY VARIABLES, CLEAN/RENAME EXISTING VARIABLES*
rename province_id pci_id
replace a11_3=4 if a11_3==5
replace a11_3=5 if a11_3>5 & a11_3 !=.
label define a12_3_new 1 "large losses" 2 "small losses" 3 "broke even" 4 "small profits" 5 "profits as planned"
label values a11_3 a12_3_new

*variable that indicates firm using more than 50% of household property or more than 50% purchased from another party
g ownland=1 if  b2a>50 & b2a != .| b2b>50 & b2b !=.
replace ownland=0 if ownland==.
replace ownland=0 if ownland==.

*variable that indicates firm rents more than 50%
g renter = 1 if b2c > 50 & b2c != . | b2d >50 & b2d !=.
replace renter=0 if renter==.

*leasing-industrial zone variable
g iz=1 if  b5_1_4==1
replace iz=0 if  b5_1_4==0

drop  id_2006 ansdate ttc7 ttc8 a11_1 a11_2 a12 b2 a7 b3  b3_1 b3_2 b3_3 b3_4 b4_1_3 b5_1 b5_1_1 b5_1_2 b5_1_3 b5_1_4 b5_1_5 ///
	  c5 c6 d2 d5 d6 e1_4 e1_5 e1_6 e1_8 e1_9 e1_10 e1_11 e1_13 e1_14 e1_15 f1_3 f1_4 f1_12 f4 f7 f9 f9_1 f9_2 f9_3 f10 f10_1 f10_2 f10_3 f11 ///
	  g2 g9_3 h2 h3 h4 h6* h7_5 h7_6 h7_7 h7_8 h7_13

*CREATE YEAR VARIABLE*
g year=2007 
order year pci_id

tempfile pci2007
save `pci2007',replace


/*****************************************PCI 2008**********************************************************************************************/

use "${dir_data_original}/PCI/PCI2008_Web.dta", clear

*DROP UNNECESSARY VARIABLES, CLEAN/RENAME EXISTING VARIABLES*
drop  id_2006 id_2007 ansdate ttc7 ttc8 ttc9_2005 ttc9_2006 ttc9_2007 a4 a5_1 a7* a9_4 a12 a12_1 b2
replace a11_3=4 if a11_3==5
replace a11_3=5 if a11_3>5 & a11_3 !=.
label define a12_3_new 1 "large losses" 2 "small losses" 3 "broke even" 4 "small profits" 5 "profits as planned"
label values a11_3 a12_3_new

*variable that indicates firm using more than 50% of household property or more than 50% purchased from another party
g ownland=1 if  b2a>50 & b2a != . | b2b>50 & b2b !=.
replace ownland=0 if ownland==.

*variable that indicates firm rents more than 50%
g renter=1 if b2c>50 & b2c != . | b2d>50 & b2d != .
replace renter=0 if renter==.

*leasing-industrial zone variable
generate iz=1 if  b5_1_4==1
replace iz=0 if  b5_1_4==0

drop b3* b4_1_3 b5_1 b5_1_1 b5_1_2 b5_1_3 b5_1_4 b5_1_5 b4_3 b4_3_1 b4_3_2 c4_1 c5 c6 d2 d6 ///
	 e1_4 e1_5 e1_6 e1_8 e1_9 e1_10 e1_11 e1_13 e1_14 e1_15 e1_16 e1_17 e1_18 e1_19 e1_20 ///
	 e2_4_1 e4 e5 e6 e7 e8 f1_12 f1_14 f1_15 f4* f8_3 f8_4 f8_4_1 f9 f9_1 f9_2 f9_3 f10 f10_1 f10_2 f10_3 f11 ///
	 g2 g6 g10* g11 g12 g12_1 h2 h8* h9* h10* h7_15 h7_16 h7_5 h7_6 h7_7 h7_8 h7_14 

*CREATE YEAR VARIABLE*
g year=2008
rename province_id pci_id
order year pci_id
sort pci_id year

tempfile pci2008
save `pci2008',replace


/*****************************************PCI 2009**********************************************************************************************/

use "${dir_data_original}/PCI/PCI2009_Web.dta", clear

*DROP UNNECESSARY VARIABLES*
drop  iid i1 i2 i3 i4 online  X _PRIV _PRIV sum_nonstate  percentage_nonstate established_postelaw sector plans customer ///
oss c5_1month c5_3month LURCwait_new ownland LURC expropriation_risk fair_compensation landprice no_landproblems d6_new ///
par1_capacity par2_transcosts par3_paper par4_fees par6_nothing d10_new d11_new d12_new d13_new d14_2_new legal planning ///
planning_avg legal_avg f2_new d14_3_new f3_new f4_new f5_new f5_1_new f7_new f8_new province web_08 web_09 change_web WEB ///
h22_new h23_new h24_new h21_new bad_implementation h1_new d14_new h25_new h26_new h26_new h31_new h32_new h33_new h34_new ///
h4_new h411_new h412_new h413_new h413_new h414_new h5_new appeal propertyrt used_legal g3_2_imp g3_3_imp enforced ///
enforced_imp use_exchange private_exchange use_again e18_new e18_new no_ent no_emp income salary _merge ln_avg_priv_salary /// 
salary _merge ln_avg_priv_salary ln_noent use_again_resid e8_resid e9_resid satisfied_labor _Isector_2 _Isector_2 _Isector_3 _Isector_4 _Ia4_2 ///
_Ia4_3 _Ia4_4 _Ia4_5 _Ia4_6 _Ia4_7 e711_use e711_priv e721_use e721_use e721_priv e722_continue e741_use e741_priv e742_continue e751_use ///
e751_priv e752_continue e761_use e761_priv  nonstate_ent09 h27_new e17_new e1_8_new e712_continue e762_continue a11

drop  id2006 id2007 id2008  type prov answerdate position aid
drop  bid cid did eid fid gid hid

/*Section A*/
/*A1, A2, and A3 can remain the same*/
/*Name of A4 (Legal form must change)*/
rename a4 a5
replace a5=5 if a5>4 & a5 != .

drop  a4_1 a6* a12

rename a5_1 a6_1
rename a5_2 a6_2
rename a5_3 a6_3
rename a5_4 a6_4

foreach var in a6_1 a6_2 a6_3 a6_4 { 
	replace `var' = 2 - `var' 
} 

/*Rename Equity and Employment Variables*/
rename a8_1 a9_1
rename a8_2 a9_2
rename a8_3 a9_3
rename a7_1 a8_1
rename a7_2 a8_2
rename a7_3 a8_3

*CREATE NEW INVESTMENT PLAN VARIABLE 
/*Investment Plans*/
generate a10=1 if  a10_6==1 
replace  a10=2 if  a10_5==1 
replace  a10=3 if  a10_4==1  
replace  a10=4 if  a10_3==1 
replace  a10=5 if  a10_2==1 
replace  a10=6 if  a10_1==1 
drop  a10_1 a10_2 a10_3 a10_4 a10_5 a10_6 

/*Profitability*/
rename a9 a11_3  

*RE-STRUCTURE VARIABLES SO YES = 1, NO = 0 

/*Firm History*/
foreach x of varlist a13* a14*{
replace `x' =2-`x' 
}

rename a13_1 h5_1
rename a13_2 h5_2
rename a13_3 h5_9
rename a13_4 h5_3
rename a13_5 h5_4
rename a14_3 h5_5
replace h5_5=1 if a14_4==1
rename a14_5 h5_6
rename a14_6 h5_7
rename a14_1 h5_10
rename a14_2 h5_11
drop a14_4

/*Major Customer*/
*Create variable for % of establishment sold domestically to SOEs & state agencies
generate a13_1=a11_a+a11_b
drop a11_a a11_b
rename a11_c a13_2
rename a11_d a13_3
rename a11_e a13_4
rename a11_f a13_5

/*Land*/
generate ownland=1 if  b2_1==1|b2_2==1|b2_3==1
replace ownland=0 if ownland==.
generate renter=1 if b2_4==1|b2_5==1
replace renter=0 if renter==.
generate iz=1 if  b3==1
replace iz=0 if  b3==2
rename   b4_1 b4_1_1
generate b4_1=2-b4
drop b4
rename   b4_2 b4_1_2
rename  b4_3 b4_1_4
rename  b4_4 b4_1_5
drop b2* b3* b5 b6* b7*

/*Time Costs*/
drop c3 c5_ c6*
drop c2*

rename c4 c3
rename c5 c4
drop  d5

rename  d2_1 d6_1
rename  d2_2 d6_2
rename  d2_3 d6_3
drop g1* g2* g3* g4*
rename  d6 g1
drop d7* d8* d9*

/*Informal Charges*/
rename d10 g3
rename  d11 g4
rename g5 h7_9
rename  d12 g5
rename  d13 g7
rename g6 g8
rename  d14_1 g9_1
rename  d14_2 g9_2
rename  d14_3 g9_4

/*Services*/
drop  e1_3 e1_4  e1_6 
rename e1_7 e1_3
rename e1_5 e1_7
rename e1_8 e1_12
drop  e2 e3* e4 e5 e6 e7_1_1 e7_1_2 e7_2_1 e7_2_2 e7_3_1 e7_3_2 e7_4_1 e7_4_2 e7_5_1 e7_5_2 e7_6_1 e7_6_2

/*Access to Finance*/
rename e11 e2
replace e2 = 2 - e2
generate e2_4=1 if  e11_1_1==1
replace e2_4=2 if  e11_1_2==1
replace e2_4=3 if  e11_1_3==1
replace e2_4=.b if e2_4==.
drop  e11_1_1 e11_1_2 e11_1_3 e11_1_3_1
rename e11_2 e2_1
rename  e11_3 e2_3
drop  e11_4 e11_4_1
rename e12_1 e3_1
rename e12_2 e3_2
rename e12_3 e3_3
rename e12_4 e3_4
drop  e8 e9 e10

/*Transparency*/
drop  f1_12 f1_13
rename f1_11 f1_13
drop  f1_10
rename f1_9 f1_11
rename f1_8 f1_10
rename  f1_7 f1_9
rename  f1_6 f1_8
rename f1_5 f1_7
rename  f1_4 f1_6
rename f1_3 f1_5
generate f8_1=2-f5

drop  f2_1_1 f2_1_2 f2_1_3
drop  f4_1 f4_1_6 f5 f5_2 f5_1 f6

rename f7 f5
rename f8 f6
drop  f9 f10

generate mobile=1 if h6 !=""
replace mobile=0 if h6==""
lab var mobile "Firm listed a province other than home as possible investment location"

rename  h2_1 h7_1
rename  h2_2 h7_2
rename  h2_3 h7_3
rename  h2_4 h7_4
rename  h2_5 h7_12
rename h2_6 h7_11
rename h4 h7_10
drop  h2_7 h3_1 h3_2 h3_3 h3_4 h3_5 h3_5 h3_5_1 h4_1_1 h4_1_2 h4_1_3 h4_1_4 h4_1_5 h4_1_5_1 h5 

*CREATE YEAR VARIABLE 
generate year=2009
order year pci_id
sort pci_id year

tempfile pci2009
save `pci2009',replace


/*****************************************PCI 2010**********************************************************************************************/

use "${dir_data_original}/PCI/PCI2010_Web.dta", clear

*DROP UNNECCESSARY VARIABLES 
drop a11 b2_7 g11 percentage_no_ent established_postelaw sector plans a11_c customer customer c1_new c2_new ///
oss c4_2010_new c4_new c5_1month c5_3month LURCwait_new ownland LURC expropriation_risk fair_compensation landprice ///
no_landproblems no_landproblems2 legal planning planning_avg legal_avg f2_new f4_new f5_new f5_1_new d14_3_new f5_new ///
f5_1_new f7_new f8_new web09 web10 WEB d6_new par1_capacity par2_transcosts par3_paper par4_fees par6_nothing d10_new ///
d11_new d12_new d13_new d13_new d14_2_new  c6_2010_new d14_2010_new c6_2010a c6_2010b d14_2010_new c6_2010a c6_2010b ///
d14_2010a d14_2010b h22_new h24_new h23_new h24_new h21_new bad_implementation h1_new e711_use e711_priv e721_use e721_priv ///
e731_use e741_use e741_use e741_priv e751_use e715_continue e725_continue e735_continue e745_continue e755_continue e765_continue ///
use_exchange private_exchange private_exchange use_again e17_new e1_8_new no_ent income no_ent no_emp income salary _NOENT ///
ln_avg_priv_salary ln_noent use_again_resid use_again_resid e8_resid e9_resid satisfied_labor _Isector_2 _Isector_3 ///
_Isector_4 _Isector_5 _Ia4_2 _Ia4_2 _Ia4_4 _Ia4_5 _Ia4_6 _Ia4_7 _Ia4_6 _Ia4_7 appeal propertyrt used_legal g3_2_imp ///
g3_3_imp enforced enforced enforced_imp region invest_size pop09 e1_1_new e1_2_new e1_3_new e1_4_new e1_5_new e1_6_new ///
infra_quality e2_impute e3_impute e4_impute e5_impute e6_impute email_dich online

drop position answerdate a_id b_id c_id d_id e_id f_id g_id h_id

/*Name of A4 (Legal form must change*/
rename a4 a5
replace a5=5 if a5>4 & a5 != . 
drop  a4_1
drop a6*

rename a5_1 a6_1
rename a5_2 a6_2
rename a5_3 a6_3
rename a5_4 a6_4

drop  a8_1_1 a8_1_2 a8_1_3

/*Rename Equity and Employment Variables*/
rename a8_1 a9_1
rename a8_2 a9_2
rename a8_3 a9_3

rename a7_1 a8_1
rename a7_2 a8_2
rename a7_3 a8_3

*SWITCH RANKING OF INVESTMENT PLANS 
/*Investment Plans*/
replace a10 = 7-a10

/*Profitability*/
rename a9 a11_3

/*Firm History*/
#delimit cr
rename a13_1 h5_1
rename a13_2 h5_2
rename a13_3 h5_9
rename a13_4 h5_3
rename a13_5 h5_4
rename a14_3 h5_5
replace h5_5=1 if a14_4==1
rename a14_5 h5_6
rename a14_6 h5_7
rename a14_1 h5_10
rename a14_2 h5_11
drop a14_4

/*Major Customer*/
generate a13_1=a11_a+a11_b
drop a11_a a11_b
rename a11_c_new a13_2
rename a11_d a13_3
rename a11_e a13_4
rename a11_f a13_5

/*Land*/
generate ownland=1 if  b2_1==1|b2_2==1|b2_3==1|b2_6==1
replace ownland=0 if ownland==.
generate renter=1 if b2_4==1|b2_5==1
replace renter=0 if renter==.
generate iz=1 if  b3==1
replace iz=0 if  iz==.
rename   b4_1 b4_1_1
rename b4 b4_1
rename   b4_2 b4_1_2
rename  b4_3 b4_1_4
rename  b4_4 b4_1_5
drop b2* b3* b5 b6 b7*

/*Time Costs*/
drop c3 c6
replace c4=c4 if c4_2010==1
replace c4=0 if c4_2010==0
drop  c4_2010 c4_2_1 c4_2_2 c4_2_3 c4_2_4 c4_3
rename c4 c3
rename c5 c4
drop d2 d5
rename  d2_1 d6_1
rename  d2_2 d6_2
rename  d2_3 d6_3
drop g1* g2* g3* g4*
rename  d6 g1
drop d7* d8* d9*

/*Informal Charges*/
rename d10 g3
rename  d11 g4
rename g5 h7_9
rename  d12 g5
rename  d13_2010 g7
drop d14_2010 
replace g7 = 0 if g7 == 2
replace g7 = . if g7 == 3
rename g6 g8
rename  d14_1 g9_1
rename  d14_2 g9_2
rename  d14_3 g9_4

/*Services*/
drop  e1_3 e1_4  e1_6
rename e1_7 e1_3
rename e1_5 e1_7
rename e1_8 e1_12

drop  e2 e3_2010 e3_2010_1 e4 e5 e6 e7*  

/*Access to Finance*/
rename e11 e2
generate e2_4=1 if  e11_1_1==1
replace e2_4=2 if  e11_1_2==1
replace e2_4=3 if  e11_1_3==1
replace e2_4=.b if e2_4==.
rename e11_2 e2_1
rename  e11_3 e2_3
drop  e11_4 e11_5*
rename e12_1 e3_1
rename e12_2 e3_2
rename e12_3 e3_3
rename e12_4 e3_4

/*Transparency*/
drop  f1_12 f1_13
rename f1_11 f1_13
drop  f1_10
rename f1_9 f1_11
rename f1_8 f1_10
rename  f1_7 f1_9
rename  f1_6 f1_8
rename f1_5 f1_7
rename  f1_4 f1_6
rename f1_3 f1_5
rename f4 f3

drop  f4_1 f4_1_6 f6*

rename f5 f8_1
rename f7 f5
rename f8 f6
drop  f9 f10

generate mobile=1 if h6 !=""
replace mobile=0 if h6==""
lab var mobile "Firm listed a province other than home as possible investment location"

drop h1_1
rename  h2_1 h7_1
rename  h2_2 h7_2
rename  h2_3 h7_3
rename  h2_4 h7_4
rename  h2_5 h7_12
rename h2_6 h7_11
rename h4 h7_10
drop  h2_7 h3_1 h3_2 h3_3 h3_4 h3_5 h3_5 h3_5_1 h4_1_1 h4_1_2 h4_1_3 h4_1_4 h4_1_5 h4_1_5_1 h5 

generate year=2010
order year pci_id
sort pci_id year

tempfile pci2010
save `pci2010',replace


/*****************************************PCI 2011**********************************************************************************************/

use "${dir_data_original}/PCI/PCI2011_Web.dta", clear

*DROP UNNECCESSARY VARIABLES 
drop a11 _merge svy_format online c1_new c2_new c4_2010_new c4_new c5_1month c5_3month LURCwait_new ownland LURC expropriation_risk fair_compensation ///
 landprice no_landproblems legal planning planning_avg legal_avg f2_new d14_3_new f4_new f5_new f5_1_new f8_new province web_09 web_10 change_web ///
 WEB d6_new d6_new par1_capacity par2_transcosts par3_paper par4_fees par6_nothing d10_new d11_new d12_new d13_new d14_2_new form2 c6_2010_new ///
 d14_2010_new c6_2010b d14_2010a d14_2010b h1_new h1_new h21_new bad_implementation h22_new h23_new h24_new e711_use e711_priv e721_use e721_priv ///
 e731_priv e741_use e741_priv e751_use e761_priv e715_continue e725_continue e735_continue e745_continue e755_continue e17_new e18_new use_exchange ///
 private_exchange use_again num_ent num_emp total_salary _nonstate ln_avg_priv_salary ln_num_ent sector e9_resid _Isector_2 _Isector_2 _Isector_3 ///
 _Isector_4 _Isector_5 _Ia4_2 _Ia4_3 _Ia4_5 _Ia4_6 _Ia4_7 e8_resid satisfied_labor c6_2010a e731_use e751_priv e761_use e765_continue total_income _Ia4_4 ///
 appeal propertyrt used_legal g3_2_imp g3_3_imp invest_size e1_1_new e1_2_new e1_4_new e1_5_new e1_6_new infra_quality e2_impute e3_impute ///
 e4_impute e5_impute e6_impute email_dich position answerdate

/*Name of A4 (Legal form must change*/
rename a4 a5
replace a5=5 if a5>4 & a5 != . 
drop  a4_1
drop a6*

rename a5_1 a6_1
rename a5_2 a6_2
rename a5_3 a6_3
rename a5_4 a6_4

drop  a8_1_1 a8_1_2 a8_1_3

/*Rename Equity and Employment Variables*/
rename a8_1 a9_1
rename a8_2 a9_2
rename a8_3 a9_3

rename a7_1 a8_1
rename a7_2 a8_2
rename a7_3 a8_3

*SWITCH RANKING OF INVESTMENT PLANS 
/*Investment Plans*/
replace a10 = 7-a10

/*Profitability*/
rename a9 a11_3

/*Firm History*/
#delimit cr
rename a13_1 h5_1
rename a13_2 h5_2
rename a13_3 h5_9
rename a13_4 h5_3
rename a13_5 h5_4
rename a14_3 h5_5
replace h5_5=1 if a14_4==1
rename a14_5 h5_6
rename a14_6 h5_7
rename a14_1 h5_10
rename a14_2 h5_11
drop a14_4

/*Major Customer*/
generate a13_1=a11_a+a11_b
drop a11_a a11_b
rename a11_c a13_2
rename a11_d a13_3
rename a11_e a13_4
rename a11_f a13_5

/*Land*/
generate ownland=1 if  b2_1==1|b2_2==1|b2_3==1|b2_6==1
replace ownland=0 if ownland==.
generate renter=1 if b2_4==1|b2_5==1
replace renter=0 if renter==.
generate iz=1 if  b3==1
replace iz=0 if  iz==.
rename   b4_1 b4_1_1
rename b4 b4_1
rename   b4_2 b4_1_2
rename  b4_3 b4_1_4
rename  b4_4 b4_1_5
drop b2* b3* b5 b6 b7*

/*Time Costs*/
drop c3 c6
replace c4=c4 if c4_2010==1
replace c4=0 if c4_2010==0
drop  c4_2010 c4_2_1 c4_2_2 c4_2_3 c4_2_4 c4_3
rename c4 c3
rename c5 c4
drop d2 d5
rename  d2_1 d6_1
rename  d2_2 d6_2
rename  d2_3 d6_3
drop g1* g2* g3* g4*
rename  d6 g1
drop d7* d8* d9*

/*Informal Charges*/
rename d10 g3
rename  d11 g4
rename g5 h7_9
rename  d12 g5
rename  d13_2010 g7
drop d14_2010 
replace g7 = 0 if g7 == 2
replace g7 = . if g7 == 3
rename g6 g8
rename  d14_1 g9_1
rename  d14_2 g9_2
rename  d14_3 g9_4

/*Services*/
drop  e1_3 e1_4  e1_6
rename e1_7 e1_3
rename e1_5 e1_7
rename e1_8 e1_12

drop  e2 e3_2010 e3_2010_1 e4 e5 e6 e7*  

/*Access to Finance*/
rename e11 e2
generate e2_4=1 if  e11_1_1==1
replace e2_4=2 if  e11_1_2==1
replace e2_4=3 if  e11_1_3==1
replace e2_4=.b if e2_4==.
rename e11_2 e2_1
rename  e11_3 e2_3
rename e12_1 e3_1
rename e12_2 e3_2
rename e12_3 e3_3
rename e12_4 e3_4

/*Transparency*/
drop  f1_12 f1_13
rename f1_11 f1_13
drop  f1_10
rename f1_9 f1_11
rename f1_8 f1_10
rename  f1_7 f1_9
rename  f1_6 f1_8
rename f1_5 f1_7
rename  f1_4 f1_6
rename f1_3 f1_5
rename f4 f3

drop  f4_1 f4_1_6 f6*

rename f5 f8_1
rename f7 f5
rename f8 f6
drop  f9 f10

generate mobile=1 if h6 !=""
replace mobile=0 if h6==""
lab var mobile "Firm listed a province other than home as possible investment location"

drop h1_1
rename  h2_1 h7_1
rename  h2_2 h7_2
rename  h2_3 h7_3
rename  h2_4 h7_4
rename  h2_5 h7_12
rename h2_7 h7_11
rename h4 h7_10
drop   h3_1 h3_2 h3_4 h3_5 h3_5 h3_5_1 h4_1_1 h4_1_2 h4_1_3 h4_1_4 h4_1_5 h4_1_5_1 h5 


generate year=2011
order year pci_id
sort pci_id year

tempfile pci2011
save `pci2011',replace


/**************************************************Building Dataset*********************************************************************/

clear all
set mem 600m
set matsize 900
set more off
use `pci2007', clear
append using `pci2008'
append using `pci2009'
append using `pci2010'
append using `pci2011'

order year pci_id pci_id2 id2006

tempfile pci_repeated_cross
save `pci_repeated_cross',replace

/*************************************************Further Cleaning*************************************************************************************/
/*Corrections*/
drop  e2_4 f1_3 f1_4 f1_16 f7 a8_1_1 a8_1_2 a8_1_3 e8 e9 e10 form2 type province 
drop b6_1 c2_1 e11_1_1 e11_1_2 e11_1_3 e11_1_3_1 h3 _Ia4_3 manufacturing  f4 
drop  a4 a13 b4_2 b4_3 b4_3 b4_3_2 b4_3_2 b5_2 b5_3

*NOT SURE WHY THIS HAPPENS* 
replace a6_4=1 if a5_5==1
drop a5_5

*CORRECT FOR 2009 CODING 
foreach x in  f1_1 f1_2 f1_5 f1_6 f1_7 f1_8 f1_9 f1_10 f1_11 /*f1_12*/ f1_13{
replace `x'=`x'-1 if `x'>2 & year==2009
}

*CORRECT SO MISSING = .
replace a1=. if a1==0
replace a2=. if a2==0

*CORRECT FOR 2009/2010 CODING 
replace c4=6 if c4==7
replace d1=2 if d1==-9
replace g4=5 if g4==4 & year>2008
replace g4=g4-1 if g4>4 & year>2008

*CLEAN B4_1_1 YEAR VARIABLE
replace b4_1_1=1994 if b4_1_1==-9
replace b4_1_1=1990 if b4_1_1== 19801990
replace b4_1_1=1994 if b4_1_1== 19891994
replace b4_1_1=2004 if b4_1_1== 19902004
replace b4_1_1=2004 if b4_1_1== 19932004
replace b4_1_1=2004 if b4_1_1== 19972004
replace b4_1_1=2006 if b4_1_1== 20012006
replace b4_1_1=2002 if b4_1_1==20032002
replace b4_1_1=2000 if b4_1_1== 20052000
replace b4_1_1=2005 if b4_1_1==2055
replace b4_1_1=2000 if b4_1_1==200
replace b4_1_1=1999 if b4_1_1==199
replace b4_1_1=a2+2 if b4_1_1<1000
replace b4_1_1=2000 if id=="VLO070" & year==2006
replace b4_1_1=1985 if id=="VLO095" & year==2006

*NOT SURE WHY THIS HAPPENS*
replace b4_1_2=30 if b4_1_2==-8
replace c3=4 if c3==-9
replace c1=7 if c1==-8
replace c1=30 if c1==-9

lab var a8_3 "Total Assets at Time t"
lab var a8_2 "Total Assets at Time t-1"
lab var a8_1 "Total Assets at Establishment"
lab var a9_3 "Total Employment at Time t"
lab var a9_2 "Total Employment at Time t-1"
lab var a9_1 "Total Employment at Establishment"
lab var a11_3 "Net Profit/Losses in at Time t"

*CHANGE RELATIONSHIP VARIABLE SO YES = 1, NO = 0
replace f2=1 if f2<3 & year<2009
replace f2=0 if f2>=3 & year<2009 & f2 !=. & f2 != .b
replace f2=2-f2 if year==2009

*CREATE SECTOR VARIABLE 
generate sector=1 if a6_1>50 & a6_1 !=. & a6_1 !=.b & a6_1 !=.a & year<2009
replace sector=1 if a6_2>50 & a6_2 !=. & a6_2 !=.b & a6_2 !=.a & year<2009
replace sector=3 if a6_3>50 & a6_3 !=. & a6_3 !=.b & a6_3 !=.a & year<2009
replace sector=4 if a6_4>50 & a6_4 !=. & a6_4 !=.b & a6_4 !=.a & year<2009

replace sector=1 if a6_1==1 & year==2011
replace sector=1 if a6_2==1 & year==2011
replace sector=3 if a6_3==1 & year==2011
replace sector=4 if a6_4==1 & year==2011

replace sector=1 if a6_1==1 & year==2010
replace sector=1 if a6_2==1 & year==2010
replace sector=3 if a6_3==1 & year==2010
replace sector=4 if a6_4==1 & year==2010

replace sector=1 if a6_1==1 & year==2009
replace sector=3 if a6_2==1 & year==2009
replace sector=4 if a6_3==1 & year==2009
replace sector=4 if a6_4==1 & year==2009

*CHANGE VARIABLE SO YES = 1, NO = 0 
replace g7=2-g7 if year==2009

label define sector 1 "Manufacturing/Construction" 3 "Service/Commerce" 4 "Agr/Aqua/NatRes"
label values sector sector

foreach x in a13_1 a13_2 a13_3 a13_4 a13_5 {
	replace `x'=0 if  `x'==. | `x'==.a |`x'==.b & year>=2010
}

replace a5=a5+1 if year==2010 & a5>1  & a5<=4
replace a5=5 if a5>5

lab var ownland "Firm owns land it occupies"
lab var renter "Firm rents land"
lab var iz "Firm located in iz"

*Clean year variable
replace a1=2004 if a1==204
replace a2=1998 if a2==998

*Clean "Changes" variable
replace f3 = 0 if f3 ==1 & year != 2010 | f3 == 2& year != 2010 
replace f3 = 0.5 if f3 == 3
replace f3 = 1 if f3 == 4 | f3 ==5

lab def f3 0 "no" 1 "yes", modify
lab var g3  "It's common for firms to pay informal charges for firms like mine"

drop  f5_2 f5_1 

*Merge in cleaned operations variables (we cleaned the open-ended, text based answers)
preserve
use "${dir_data_original}/PCI/operations_allyear",clear
tempfile operations
save `operations',replace
restore

merge 1:1 id year using `operations'
drop if _m==2
drop _m

ren operation_no operations

sort pci_id year
order  year pci_id prov id form  a1 a2 a3 a5 sector ownland renter iz

/***********************************************************MERGE IN ISIC CODES****************************************************/

* ISIC codes were hand-coded in Excel files
* They were saved as ISIC_2009.dta and so forth, which are called below

tempfile temp
save `temp'

use "${dir_data_original}/Crosswalks/ISIC_2011.dta",clear
bys id: keep if _n==1
gen ISIC1 = trim(codea6_1)
replace ISIC1 = substr(ISIC1,1,4) 
gen year = 2011
keep year id ISIC1
compress
codebook ISIC1
merge 1:m year id using `temp'
drop if _merge==1
drop _merge

save `temp', replace
use "${dir_data_original}/Crosswalks/ISIC_2010.dta",clear
gen ISIC1 = trim(codea6_1)
replace ISIC1 = substr(ISIC1,1,4) 
gen year = 2010
keep year id ISIC1
compress
codebook ISIC1
merge 1:m year id using `temp'
drop if _merge==1
drop _merge

save `temp', replace
use "${dir_data_original}/Crosswalks/ISIC_2009.dta",clear
gen ISIC1 = trim( a6_1_code)
replace ISIC1 = substr(ISIC1,1,4) 
gen year = 2009
keep year id ISIC1
bys id: keep if _n==1  // duplicates in the excel file...
codebook ISIC1
merge 1:m year id using `temp'
drop if _merge==1
drop _merge

save `temp', replace
use "${dir_data_original}/Crosswalks/ISIC_2008.dta",clear
gen ISIC1 = trim(a7_1_code)
replace ISIC1 = substr(ISIC1,1,4)
gen year = 2008 
keep year id ISIC1 
compress
codebook ISIC1
merge 1:m year id using `temp'
drop if _merge==1
drop _merge

sort year id
save `temp', replace
use "${dir_data_original}/Crosswalks/ISIC_2007.dta",clear
gen year = 2007
gen ISIC1 = substr(isic1,1,4)
keep year id ISIC1
compress
codebook ISIC1
merge 1:m year id using `temp'
drop if _merge==1
drop _merge

gen isic_rev4_4digit = ISIC1
lab var isic_rev4_4digit "ISIC code, 4 digit"

replace isic_rev4_4digit = isic_rev4_4digit + "0" if length(isic_rev4_4digit)==1

g isic_rev4_2digit = substr(isic_rev4_4digit, 1,2) 
replace isic_rev4_2digit = "" if real(isic_rev4_2dig)>99 | real(isic_rev4_2dig)<1

tab year if isic_rev4_2digit==""

/***********************************************************SAVING****************************************************/

lab var  sector "General Sector"
lab var year "Year of Survey"
order  year pci_id prov id form  a1 a2 a3 a5 sector ISIC* ownland renter iz mobile 
compress
save "${dir_data_coded}/PCI_repeated_cross_CLEAN.dta", replace






